/* Program to run regressions for Notch Example */

#delimit';'
set memory 2000m;
set more off;

/* Open the data file */
use "cps_ss_final",replace;
sum;


/* Get means first to compute elasticities */
sum sharlive incsstotcpi inst [w=wtsupp];
gen qincssnew_2 = qincssnew>0 if qincssnew!=.;
bysort qincssnew_2: sum sharlive incsstotcpi inst [w=wtsupp];


/* First show estimates for full, non-imputed, and imputed samples */
/* FIRST STAGE AND REDUCED FORM RESULTS DO NOT APPEAR IN PAPER */
/* INSTEAD - those results are from the residualized versions below which are full sample residuals estimated separately for the non-imputed and imputed groups */ 
/* OLS */
xi: reg sharlive incsstotcpi i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year [w=wtsupp], cluster(yobh);
xi: reg sharlive incsstotcpi i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew==0 [w=wtsupp], cluster(yobh);
xi: reg sharlive incsstotcpi i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew>0 [w=wtsupp], cluster(yobh);

/* First stage */
xi: reg incsstotcpi inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year [w=wtsupp], cluster(yobh);
xi: reg incsstotcpi inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew==0 [w=wtsupp], cluster(yobh);
xi: reg incsstotcpi inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew>0 [w=wtsupp], cluster(yobh);

/* Reduced form */
xi: reg sharlive inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year [w=wtsupp], cluster(yobh);
xi: reg sharlive inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew==0 [w=wtsupp], cluster(yobh);
xi: reg sharlive inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew>0 [w=wtsupp], cluster(yobh);

/* 2SLS */
xi: ivregress 2sls sharlive (incsstotcpi = inst) i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year [w=wtsupp], cluster(yobh);
xi: ivregress 2sls sharlive (incsstotcpi = inst) i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew==0 [w=wtsupp], cluster(yobh);
xi: ivregress 2sls sharlive (incsstotcpi = inst) i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if qincssnew>0 [w=wtsupp], cluster(yobh);


/* Get IPW results - use exogenous variables */
gen nonimpute = qincssnew==0 if qincssnew!=.;
xi: probit nonimpute i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year [pw=wtsupp];

predict prob_non_impute; /* Get probability of being non-imputed */

bysort nonimpute: sum prob_non_impute, detail;
gen wtsupp_ipw = wtsupp*(1/prob_non_impute);

/* OLS */
xi: reg sharlive incsstotcpi i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if nonimpute==1 [w=wtsupp_ipw], cluster(yobh);
/* First stage */
xi: reg incsstotcpi inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if nonimpute==1 [w=wtsupp_ipw], cluster(yobh);
/* Reduced form */
xi: reg sharlive inst i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if nonimpute==1 [w=wtsupp_ipw], cluster(yobh);
/* 2SLS */
xi: ivregress 2sls sharlive (incsstotcpi = inst) i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year if nonimpute==1 [w=wtsupp_ipw], cluster(yobh);
drop nonimpute prob_non_impute wtsupp_ipw;




/* Repeat analysis but using residuals only!!! */
/* THIS WAY PUT IMPUTED AND NON-IMPUTED ON SAME FOOTING */
/* i.e., the impact of other regressors is the same */
/* BUT ONLY NEED RESIDUALIZED INSTRUMENT - NOT THE X OR Y VARIABLES */

/* Get means first to compute elasticities */
sum sharlive incsstotcpi inst [w=wtsupp];
bysort qincssnew_2: sum sharlive incsstotcpi inst [w=wtsupp];

/* First get residuals */
for var sharlive incsstotcpi inst: gen X_adj = X;
/*
for var sharlive incsstotcpi inst:
      xi: reg X i.ageh i.educ0 i.division i.marstnew agespouse hsgradsp somecollsp collgradsp i.year [w=wtsupp], cluster(yobh) \
      predict X_adj if e(sample), resid;
*/

/* OLS */
xi: reg sharlive_adj incsstotcpi_adj [w=wtsupp], cluster(yobh);
xi: reg sharlive_adj incsstotcpi_adj if qincssnew==0 [w=wtsupp], cluster(yobh);
xi: reg sharlive_adj incsstotcpi_adj if qincssnew>0 [w=wtsupp], cluster(yobh);

/* First stage  */
xi: reg incsstotcpi_adj  inst_adj [w=wtsupp], cluster(yobh);
scalar pi_total = _b[inst_adj];
xi: reg incsstotcpi_adj  inst_adj if qincssnew==0 [w=wtsupp], cluster(yobh);
scalar pi_non_impute = _b[inst_adj];
xi: reg incsstotcpi_adj  inst_adj if qincssnew>0 [w=wtsupp], cluster(yobh);
scalar pi_imputed = _b[inst_adj];

/* Reduced form  */
xi: reg sharlive_adj inst_adj [w=wtsupp], cluster(yobh);
scalar delta_total = _b[inst_adj];
xi: reg sharlive_adj inst_adj if qincssnew==0 [w=wtsupp], cluster(yobh);
scalar delta_non_impute = _b[inst_adj];
xi: reg sharlive_adj inst_adj if qincssnew>0 [w=wtsupp], cluster(yobh);
scalar delta_imputed = _b[inst_adj];

/* 2SLS */
xi: ivregress 2sls sharlive_adj (incsstotcpi_adj = inst_adj) [w=wtsupp], cluster(yobh);
xi: ivregress 2sls sharlive_adj (incsstotcpi_adj = inst_adj) if qincssnew==0 [w=wtsupp], cluster(yobh);
xi: ivregress 2sls sharlive_adj (incsstotcpi_adj = inst_adj) if qincssnew>0 [w=wtsupp], cluster(yobh);


/* Get decomposition weights using sample weights */
/* For unweighted decomposition, set wtsupp=1 at beginning of program */
/* Stata transforms weights v_i to w_i = v_i*(n/V) */
/* where n is sample size and V is sum of weights */
/* Thus, variation is s^2*(((n-1)/n)*V) instead of s^2*(n-1) */
sum inst_adj [w=wtsupp];
scalar total_variation = r(Var)*(((r(N)-1)/r(N))*r(sum_w));
scalar total_N = r(N);
scalar total_N_wghtd = r(sum_w);
scalar total_mean = r(mean);

sum inst_adj if qincssnew==0 [w=wtsupp];
scalar non_impute_variation = r(Var)*(((r(N)-1)/r(N))*r(sum_w));
scalar non_impute_N = r(N);
scalar non_impute_N_wghtd = r(sum_w);
scalar non_impute_mean = r(mean);

sum inst_adj if qincssnew>0 [w=wtsupp];
scalar imputed_variation = r(Var)*(((r(N)-1)/r(N))*r(sum_w));
scalar imputed_N = r(N);
scalar imputed_N_wghtd = r(sum_w);
scalar imputed_mean = r(mean);

scalar between_variation = non_impute_N_wghtd*(non_impute_mean - total_mean)^2 +
                           imputed_N_wghtd*(imputed_mean - total_mean)^2;
scalar total_variation_summed = non_impute_variation +
                                imputed_variation +
                                between_variation;
scalar compare = total_variation - total_variation_summed;

scalar weight_non_impute = non_impute_variation/total_variation;
scalar weight_imputed = imputed_variation/total_variation;
scalar weight_between = between_variation/total_variation;


/* Reduced form variation */
/* Need to apply Stata summarize transformation to weights v_i to w_i = v_i*(n/V) */
/* where n is sample size and V is sum of weights */
/* This way, match variation for instrument constructed above */
/* Thus, variation is cov*(((n-1)/n)*V) instead of cov*(n-1) */
corr inst_adj sharlive_adj [w=wtsupp], cov;
scalar total_variation_yz = r(cov_12)*(((r(N)-1)/r(N))*total_N_wghtd);
sum sharlive_adj [w=wtsupp];
scalar total_y_mean = r(mean);

corr inst_adj sharlive_adj if qincssnew==0 [w=wtsupp], cov;
scalar non_impute_variation_yz = r(cov_12)*(((r(N)-1)/r(N))*non_impute_N_wghtd);
sum sharlive_adj if qincssnew==0 [w=wtsupp];
scalar non_impute_y_mean = r(mean);

corr inst_adj sharlive_adj if qincssnew>0 [w=wtsupp], cov;
scalar imputed_variation_yz = r(cov_12)*(((r(N)-1)/r(N))*imputed_N_wghtd);
sum sharlive_adj if qincssnew>0 [w=wtsupp];
scalar imputed_y_mean = r(mean);

scalar between_variation_yz =
       non_impute_N_wghtd*(non_impute_mean - total_mean)*
        (non_impute_y_mean - total_y_mean) +
       imputed_N_wghtd*(imputed_mean - total_mean)*
        (imputed_y_mean - total_y_mean);

scalar total_variation_yz_summed = non_impute_variation_yz + imputed_variation_yz +
                                   between_variation_yz;

scalar delta_total_variation = total_variation_yz/total_variation;
scalar delta_non_impute_variation = non_impute_variation_yz/non_impute_variation;
scalar delta_imputed_variation = imputed_variation_yz/imputed_variation;
scalar delta_between_variation = between_variation_yz/between_variation;

scalar delta_total_summed = weight_non_impute*delta_non_impute_variation +
                            weight_imputed*delta_imputed_variation +
                            weight_between*delta_between_variation;


/* First stage variation */
/* Need to apply Stata summarize transformation to weights v_i to w_i = v_i*(n/V) */
/* where n is sample size and V is sum of weights */
/* This way, match variation for instrument constructed above */
/* Thus, variation is cov*(((n-1)/n)*V) instead of cov*(n-1) */
corr inst_adj incsstotcpi_adj [w=wtsupp], cov;
scalar total_variation_xz = r(cov_12)*(((r(N)-1)/r(N))*total_N_wghtd);
sum incsstotcpi_adj [w=wtsupp];
scalar total_x_mean = r(mean);

corr inst_adj incsstotcpi_adj if qincssnew==0 [w=wtsupp], cov;
scalar non_impute_variation_xz = r(cov_12)*(((r(N)-1)/r(N))*non_impute_N_wghtd);
sum incsstotcpi_adj if qincssnew==0 [w=wtsupp];
scalar non_impute_x_mean = r(mean);

corr inst_adj incsstotcpi_adj if qincssnew>0 [w=wtsupp], cov;
scalar imputed_variation_xz = r(cov_12)*(((r(N)-1)/r(N))*imputed_N_wghtd);
sum incsstotcpi_adj if qincssnew>0 [w=wtsupp];
scalar imputed_x_mean = r(mean);

scalar between_variation_xz =
       non_impute_N_wghtd*(non_impute_mean - total_mean)*
        (non_impute_x_mean - total_x_mean) +
       imputed_N_wghtd*(imputed_mean - total_mean)*
        (imputed_x_mean - total_x_mean);

scalar total_variation_xz_summed = non_impute_variation_xz + imputed_variation_xz +
                                   between_variation_xz;

scalar pi_total_variation = total_variation_xz/total_variation;
scalar pi_non_impute_variation = non_impute_variation_xz/non_impute_variation;
scalar pi_imputed_variation = imputed_variation_xz/imputed_variation;
scalar pi_between_variation = between_variation_xz/between_variation;

scalar pi_total_summed = weight_non_impute*pi_non_impute_variation +
                         weight_imputed*pi_imputed_variation +
                         weight_between*pi_between_variation;


scalar beta_iv = delta_total/pi_total;
scalar beta_iv_summed = delta_total_summed/pi_total_summed;


scalar list; 


/* Drop residuals */
drop *_adj;
